1 Sheet 1

1.1 What is the relationship between population and IGF revenue performance patterns?

# Descriptive statistics
Cleaned_AMA_Data %>% skim(Population)
Data summary
Name Piped data
Number of rows 9
Number of columns 76
_______________________
Column type frequency:
numeric 1
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Population 0 1 1613690 756390.3 284124 1871647 1936836 2036889 2138833 ▂▁▁▁▇
Cleaned_AMA_Data %>% skim(IGF)
Data summary
Name Piped data
Number of rows 9
Number of columns 76
_______________________
Column type frequency:
numeric 1
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
IGF 0 1 31429511 15745039 14395782 15535894 32840336 40072210 55200507 ▇▁▃▂▃
# Histograms
ggplot(Cleaned_AMA_Data, aes(x = Population)) +
  geom_histogram(bins = 10, fill = "dodgerblue", color = "black") +
  labs(title = "Distribution of Population", x = "Population") +
  scale_x_continuous(labels = comma)

ggplot(Cleaned_AMA_Data, aes(x = IGF)) +
  geom_histogram(bins = 10, fill = "dodgerblue", color = "black") +
  labs(title = "Distribution of IGF Revenue", x = "IGF Revenue") +
  scale_x_continuous(labels = comma)

# Growth Rate (Percentage)
Cleaned_AMA_Data <- Cleaned_AMA_Data %>%
  mutate(
    Population_Growth_Rate = c(NA, diff(Population) / Population[-length(Population)] * 100),
    IGF_Growth_Rate = c(NA, diff(IGF) / IGF[-length(IGF)] * 100)
  )

# Plot of Trends

ggplot(Cleaned_AMA_Data, aes(x = Year)) +
  geom_line(aes(y = Population)) +
  geom_point(aes(y = Population), color = "dodgerblue") +
  labs(title = "Population Trend", x = "Year", y = "Population") +
  scale_y_continuous(labels = comma)

ggplot(Cleaned_AMA_Data, aes(x = Year)) +
  geom_line(aes(y = IGF)) +
  geom_point(aes(y = IGF), color = "dodgerblue") +
  labs(title = "IGF Trend", x = "Year", y = "IGF") +
  scale_y_continuous(labels = comma)

ggplot(Cleaned_AMA_Data, aes(x = Year)) +
  geom_line(aes(y = Population, color = "Population")) +
  geom_point(aes(y = Population, color = "Population")) +
  geom_line(aes(y = IGF, color = "IGF")) +
    geom_point(aes(y = IGF, color = "IGF")) +
  labs(title = "Population vs. IGF Revenue", x = "Year", y = "Amount/Population", color = "Type") +
  scale_y_continuous(labels = comma)

# Growth rate plots
ggplot(Cleaned_AMA_Data, aes(x = Year)) +
  geom_line(aes(y = Population_Growth_Rate, color = "Population Growth")) +
    geom_point(aes(y = Population_Growth_Rate, color = "Population Growth")) +
  geom_line(aes(y = IGF_Growth_Rate, color = "IGF Growth")) +
    geom_point(aes(y = IGF_Growth_Rate, color = "IGF Growth")) +
  labs(title = "Population Growth vs. IGF Growth", x = "Year", y = "Growth Rate (%)", color = "Type") +
  scale_y_continuous(labels = percent_format(scale = 1)) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") # Add horizontal line at zero

The histograms show an uneven distribution of population and IGF revenue. The population reveals the presence of two distinct population clusters.The trends plots show clear that the trend of IGF Revenue ( which experienced significant changes) is not directly linked to the trend of Population( which remained stable).

1.1.1 Regression Analysis

mod1 <- lm(IGF ~ Population, data = Cleaned_AMA_Data)
summary(mod1)
## 
## Call:
## lm(formula = IGF ~ Population, data = Cleaned_AMA_Data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -22373920  -1860766   -820315   5514799  19515595 
## 
## Coefficients:
##                Estimate  Std. Error t value Pr(>|t|)
## (Intercept) 13014136.12 11608703.30   1.121    0.299
## Population        11.41        6.58   1.734    0.126
## 
## Residual standard error: 14080000 on 7 degrees of freedom
## Multiple R-squared:  0.3006, Adjusted R-squared:  0.2006 
## F-statistic: 3.008 on 1 and 7 DF,  p-value: 0.1264
Cleaned_AMA_Data %>%
  ggplot(aes(x = Population, y = IGF)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE) + 
  labs(x = "Population", y = "IGF Revenue (Ghana Cedis)", title = "Linear Relationship between Population and IGF Revenue") + 
  scale_y_continuous(labels = scales::comma)

The F-statistic and its associated p-value (0.1264) indicate that there is no statistically significant relationship between population and IGF revenue. The R-squared is 0.3006, which means only 30.06% of the variation in IGF revenue can be explained by population even though this relationship is not statistically significant.

  • Checking Regression Assumptions
# Scatter Plot

ggplot(Cleaned_AMA_Data, aes(x = Population, y = IGF)) +
  geom_point() +
  labs(title = "Population vs. IGF Revenue", x = "Population", y = "IGF Revenue")

# Residual
ggplot(data = data.frame(residuals = residuals(mod1), fitted = fitted(mod1)), aes(x = fitted, y = residuals)) +
  geom_point() + # Added geom_point()
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  labs(title = "Residuals vs. Fitted", x = "Fitted Values", y = "Residuals")

ggplot(data = data.frame(residuals = residuals(mod1)), aes(x = residuals)) +
  geom_histogram(bins = 10, fill = "skyblue", color = "black") +
  labs(title = "Histogram of Residuals", x = "Residuals")

ggplot(data = data.frame(residuals = residuals(mod1)), aes(sample = residuals)) +
  geom_point(stat = "qq") + # Added geom_point()
  stat_qq_line() +
  labs(title = "Q-Q Plot of Residuals")

# Autocorrelation (Durbin-Watson Test)
dwtest(mod1)
## 
##  Durbin-Watson test
## 
## data:  mod1
## DW = 1.3269, p-value = 0.07671
## alternative hypothesis: true autocorrelation is greater than 0
# Homoscedasticity (Breusch-Pagan Test)
bptest(mod1)
## 
##  studentized Breusch-Pagan test
## 
## data:  mod1
## BP = 2.5693, df = 1, p-value = 0.109

The scatter plot shows a positive but non-linear relationship. It shows that as population increases IGF revenue tends to increase as well. The residual plots show slight violations of linearity and normality assumptions. The Durbin-Watson test is not significant mean no autocorrelation, and the Breusch-Pagan test shows homoscedasticity.

  • Transformations
# Transformed Models
lm(Ln_IGF ~ Ln_Pop, data = Cleaned_AMA_Data) %>% summary()
## 
## Call:
## lm(formula = Ln_IGF ~ Ln_Pop, data = Cleaned_AMA_Data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.81443 -0.05390  0.01916  0.21476  0.51471 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  11.5295     2.5857   4.459  0.00294 **
## Ln_Pop        0.3987     0.1834   2.174  0.06623 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4424 on 7 degrees of freedom
## Multiple R-squared:  0.403,  Adjusted R-squared:  0.3177 
## F-statistic: 4.726 on 1 and 7 DF,  p-value: 0.06623
Cleaned_AMA_Data$Sqrt_Population <- sqrt(Cleaned_AMA_Data$Population)
Cleaned_AMA_Data$Sqrt_IGF <- sqrt(Cleaned_AMA_Data$IGF)
lm(Sqrt_IGF ~ Sqrt_Population, data = Cleaned_AMA_Data) %>% summary()
## 
## Call:
## lm(formula = Sqrt_IGF ~ Sqrt_Population, data = Cleaned_AMA_Data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2100.94  -152.82   -23.22   543.70  1565.49 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)  
## (Intercept)     2780.967   1423.457   1.954   0.0917 .
## Sqrt_Population    2.188      1.121   1.952   0.0919 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1225 on 7 degrees of freedom
## Multiple R-squared:  0.3525, Adjusted R-squared:   0.26 
## F-statistic: 3.811 on 1 and 7 DF,  p-value: 0.09187
# Scatter Plots (Transformed Data)
ggplot(Cleaned_AMA_Data, aes(x = Ln_Pop, y = Ln_IGF)) +
  geom_point() +
  labs(title = "Log(Population) vs. Log(IGF Revenue)", x = "Log(Population)", y = "Log(IGF Revenue)")

ggplot(Cleaned_AMA_Data, aes(x = Sqrt_Population, y = Sqrt_IGF)) +
  geom_point() +
  labs(title = "Sqrt(Population) vs. Sqrt(IGF Revenue)", x = "Sqrt(Population)", y = "Sqrt(IGF Revenue)")

Even after log and square root transformations, we did not find statistically significant relationships between population and IGF revenue.

Therefore from the analysis we found no statistically significant relationship between population size and IGF revenue in this dataset. The small sample size (n=9) may have made it hard to find the pattern and limited the power to detect significant effects. Also some factors not measured meaning they are missing in the model might be the reason.

1.2 What is the relationship between population and DACF revenue performance patterns?

Cleaned_AMA_Data %>% skim(Population)
Data summary
Name Piped data
Number of rows 9
Number of columns 80
_______________________
Column type frequency:
numeric 1
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Population 0 1 1613690 756390.3 284124 1871647 1936836 2036889 2138833 ▂▁▁▁▇
Cleaned_AMA_Data %>% skim(DACF)
Data summary
Name Piped data
Number of rows 9
Number of columns 80
_______________________
Column type frequency:
numeric 1
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
DACF 0 1 5638029 2694114 1685390 3607546 5079623 8043158 9497586 ▂▇▂▂▇
# Histograms
ggplot(Cleaned_AMA_Data, aes(x = Population)) +
  geom_histogram(bins = 10, fill = "dodgerblue", color = "black") +
  labs(title = "Distribution of Population", x = "Population")

ggplot(Cleaned_AMA_Data, aes(x = DACF)) +
  geom_histogram(bins = 10, fill = "dodgerblue", color = "black") +
  labs(title = "Distribution of DACF Revenue", x = "DACF Revenue")

#Growth Rates and Per Capita Values
Cleaned_AMA_Data <- Cleaned_AMA_Data %>%
  mutate(
    Population_Growth_Rate = c(NA, diff(Population) / Population[-length(Population)] * 100),
    DACF_Growth_Rate = c(NA, diff(DACF) / DACF[-length(DACF)] * 100)
  )




# Plotting Trends

ggplot(Cleaned_AMA_Data, aes(x = Year)) +
  geom_line(aes(y = Population)) +
  geom_point(aes(y = Population), color = "dodgerblue") +
  labs(title = "Population Trend", x = "Year", y = "Population") +
  scale_y_continuous(labels = comma)

ggplot(Cleaned_AMA_Data, aes(x = Year)) +
  geom_line(aes(y = DACF)) +
  geom_point(aes(y = DACF), color = "dodgerblue") +
  labs(title = "DACF Trend", x = "Year", y = "IGF") +
  scale_y_continuous(labels = comma)

ggplot(Cleaned_AMA_Data, aes(x = Year)) +
  geom_line(aes(y = Population, color = "Population")) +
  geom_point(aes(y = Population, color = "Population")) +
  geom_line(aes(y = DACF, color = "DACF")) +
  geom_point(aes(y = DACF, color = "DACF")) +
  labs(title = "Population vs. DACF Revenue", x = "Year", y = "Amount/Population", color = "Type") +
  scale_y_continuous(labels = scales::comma)

# Plotting Growth Rates
ggplot(Cleaned_AMA_Data, aes(x = Year)) +
  geom_line(aes(y = Population_Growth_Rate, color = "Population Growth")) +
  geom_point(aes(y = Population_Growth_Rate, color = "Population Growth")) +
  geom_line(aes(y = DACF_Growth_Rate, color = "DACF Growth")) +
  geom_point(aes(y = DACF_Growth_Rate, color = "DACF Growth")) +
  labs(title = "Population Growth vs. DACF Growth", x = "Year", y = "Growth Rate (%)", color = "Type")+
  geom_hline(yintercept = 0, linetype = "dashed", color = "red")

The histograms show an uneven distribution of population and DACF revenue. The population reveals the presence of two distinct population clusters.The trends plots show clear that the trend of DACF Revenue ( which experienced significant changes) is not directly linked to the trend of Population( which remained stable).

1.2.1 Regression Analysis

mod2 <- lm(DACF ~ Population, data = Cleaned_AMA_Data)
summary(mod2)
## 
## Call:
## lm(formula = DACF ~ Population, data = Cleaned_AMA_Data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -4217574 -1710429   449791  2123625  3527669 
## 
## Coefficients:
##                Estimate  Std. Error t value Pr(>|t|)
## (Intercept) 3980686.267 2274203.376   1.750    0.124
## Population        1.027       1.289   0.797    0.452
## 
## Residual standard error: 2758000 on 7 degrees of freedom
## Multiple R-squared:  0.08315,    Adjusted R-squared:  -0.04783 
## F-statistic: 0.6348 on 1 and 7 DF,  p-value: 0.4518
Cleaned_AMA_Data %>%
  ggplot(aes(x = Population, y = DACF)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE) + # Added confidence intervals
  labs(x = "Population", y = "DACF Revenue (Ghana Cedis)", title = "Linear Relationship between Population and DACF Revenue") +
  scale_y_continuous(labels = scales::comma)

The linear regression results indicated no statistically significant relationship (R-squared = 0.08315, p = 0.4518). Given this model it cannot be concluded that changes in the population reliably predict changes in the DACF revenue performance, and any observed pattern could likely be due to chance. The estimate coefficient of the population is 1.027.

  • Checking Regression Assumptions
 #Scatter Plot 
ggplot(Cleaned_AMA_Data, aes(x = Population, y = DACF)) +
  geom_point() +
  labs(title = "Population vs. DACF Revenue",
       x = "Population", y = "DACF Revenue")

#  Residual 
ggplot(data = data.frame(residuals = residuals(mod2),
                        fitted = fitted(mod2)),
       aes(x = fitted, y = residuals)) +
  geom_point() +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  labs(title = "Residuals vs. Fitted",
       x = "Fitted Values", y = "Residuals")

ggplot(data = data.frame(residuals = residuals(mod2)),
       aes(x = residuals)) +
  geom_histogram(bins = 10, fill = "skyblue", color = "black") +
  labs(title = "Histogram of Residuals", x = "Residuals")

ggplot(data = data.frame(residuals = residuals(mod2)),
       aes(sample = residuals)) +
  stat_qq() +
  stat_qq_line() +
  labs(title = "Q-Q Plot of Residuals ")

# Autocorrelation
dwtest(mod2)
## 
##  Durbin-Watson test
## 
## data:  mod2
## DW = 2.3616, p-value = 0.609
## alternative hypothesis: true autocorrelation is greater than 0
# Homoscedasticity (Constant Variance of Residuals)

bptest(mod2)
## 
##  studentized Breusch-Pagan test
## 
## data:  mod2
## BP = 1.8931, df = 1, p-value = 0.1689
# Multicollinearity
#simple linear regression with one predictor(population), multicollinearity is not an issue.


# Multivariate Normality

#It is a simple linear regression with one predictor(population), multicollinearity therefore this is not an issue.

The scatter plot shows a positive but non-linear relationship. It shows that as population increases DACF revenue tends to increase as well. The histogram plot show a potential violation of the normality assumption. The Durbin-Watson test revealed no autocorrelation, and the Breusch-Pagan test shows homoscedasticity.

  • Transformations
#Transformed Models
lm(log(DACF) ~ log(Population), data = Cleaned_AMA_Data) %>% 
  summary()
# 
# Call:
# lm(formula = log(DACF) ~ log(Population), data = Cleaned_AMA_Data)
# 
# Residuals:
#     Min      1Q  Median      3Q     Max 
# -1.1254 -0.1930  0.1834  0.4365  0.5998 
# 
# Coefficients:
#                 Estimate Std. Error t value Pr(>|t|)   
# (Intercept)      13.8620     3.4404   4.029    0.005 **
# log(Population)   0.1109     0.2440   0.454    0.663   
# ---
# Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
# Residual standard error: 0.5887 on 7 degrees of freedom
# Multiple R-squared:  0.02863, Adjusted R-squared:  -0.1101 
# F-statistic: 0.2063 on 1 and 7 DF,  p-value: 0.6634
lm( sqrt(DACF)~sqrt(Population), data = Cleaned_AMA_Data ) %>% 
  summary()
# 
# Call:
# lm(formula = sqrt(DACF) ~ sqrt(Population), data = Cleaned_AMA_Data)
# 
# Residuals:
#     Min      1Q  Median      3Q     Max 
# -1063.9  -300.9   170.3   471.8   711.1 
# 
# Coefficients:
#                   Estimate Std. Error t value Pr(>|t|)  
# (Intercept)      1865.5189   717.9963   2.598   0.0355 *
# sqrt(Population)    0.3630     0.5652   0.642   0.5412  
# ---
# Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
# Residual standard error: 618.1 on 7 degrees of freedom
# Multiple R-squared:  0.05564, Adjusted R-squared:  -0.07926 
# F-statistic: 0.4125 on 1 and 7 DF,  p-value: 0.5412
#  Scatter Plots (Transformed Data)
ggplot(Cleaned_AMA_Data, aes(x = log(Population), y = log(DACF))) +
  geom_point() +
  labs(title = "Log(Population) vs. Log(DACF Revenue)",
       x = "Log(Population)", y = "Log(DACF Revenue)")

ggplot(Cleaned_AMA_Data, aes(x = log(Population), y = log(DACF))) +
  geom_point() +
  labs(title = "Sqrt(Population) vs. Sqrt(DACF Revenue)",
       x = "Sqrt(Population)", y = "Sqrt(DACF Revenue)")

The linear regression results earlier indicated that the relationship between population size and DAGF revenue is not statistically significant. After trying log , square root, transformations and regression models, We did not find statistically significant relationships between Population and DACF Revenue.Therefore in this data, population size in itself does not appear to be a primary driver of DAGF revenue.

1.3 What is the relationship between population, recurerent and capital expenditure?

The recurrent expenditure values are not available.

1.3.1 Regression Results

mod3 <- lm(Capital_Expenditure ~ Population, data = Cleaned_AMA_Data)
summary(mod3)
## 
## Call:
## lm(formula = Capital_Expenditure ~ Population, data = Cleaned_AMA_Data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -15480921  -4324721   -976274   9154924  11443212 
## 
## Coefficients:
##                Estimate  Std. Error t value Pr(>|t|)
## (Intercept) -230353.553 8235549.845  -0.028    0.978
## Population        8.073       4.668   1.729    0.127
## 
## Residual standard error: 9987000 on 7 degrees of freedom
## Multiple R-squared:  0.2994, Adjusted R-squared:  0.1993 
## F-statistic: 2.991 on 1 and 7 DF,  p-value: 0.1274
Cleaned_AMA_Data %>% 
  ggplot(aes(x = Population, y = Capital_Expenditure)) +
  geom_point()+
  geom_smooth(method = "lm", se = TRUE) + labs(x = "Population", y = "Capital Expenditure", title = "Linear Relationship Population and Capital Expenditure")+
   scale_y_continuous(labels = scales::comma)

From the linear regression result, the F-statistic and its associated p-value (0.1274) are not statistically significant. Therefore the analysis found no statistically significant relationship between population and Capital Expenditure .Given this model it cannot be concluded that changes in the population reliably predict changes in the capital expenditure, and any observed pattern could likely be due to chance.

For every one-unit increase in population, capital expenditure is estimated to increase by 8.073 The Multiple R-squared (0.2994) indicates that 29.94% of the variation in capital expenditure can be explained by the model (population).

  • Checking Regression Assumptions
 #Scatter Plot 
ggplot(Cleaned_AMA_Data, aes(x = Population, y = Capital_Expenditure)) +
  geom_point() +
  labs(title = "Population vs. Capital Expenditure",
       x = "Population", y = "Capital Expenditure")

#  Residual 
ggplot(data = data.frame(residuals = residuals(mod3),
                        fitted = fitted(mod3)),
       aes(x = fitted, y = residuals)) +
  geom_point() +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  labs(title = "Residuals vs. Fitted",
       x = "Fitted Values", y = "Residuals")

ggplot(data = data.frame(residuals = residuals(mod3)),
       aes(x = residuals)) +
  geom_histogram(bins = 10, fill = "skyblue", color = "black") +
  labs(title = "Histogram of Residuals", x = "Residuals")

ggplot(data = data.frame(residuals = residuals(mod3)),
       aes(sample = residuals)) +
  stat_qq() +
  stat_qq_line() +
  labs(title = "Q-Q Plot of Residuals ")

# Autocorrelation
dwtest(mod3)
## 
##  Durbin-Watson test
## 
## data:  mod3
## DW = 1.5868, p-value = 0.1604
## alternative hypothesis: true autocorrelation is greater than 0
# Homoscedasticity (Constant Variance of Residuals)

bptest(mod3)
## 
##  studentized Breusch-Pagan test
## 
## data:  mod3
## BP = 3.2215, df = 1, p-value = 0.07268
# Multicollinearity
#simple linear regression with one predictor(population), multicollinearity is not an issue.


# Multivariate Normality

#It is a simple linear regression with one predictor(population), multicollinearity therefore this is not an issue.

The scatter plot shows that as population increases Capital Expenditure revenue tends to increase as well. But the relationship between them is non-linear though positive. There’s a cluster of points with lower population and lower Capital Expenditure, and another cluster with higher population and higher Capital Expenditure.

The histogram of the residuals is not symmetric indicating it is not normal. Linearity is sightly also not satisfied. No autocorrelation. The residuals are uncorrelated.Homoscedasticity is satisfied.The residuals have constant variance. And since we are dealing a simple linear regression with one predictor(population), multicollinearity is not an issue.

  • Transformations
Cleaned_AMA_Data$Ln_Population <- log(Cleaned_AMA_Data$Population)
Cleaned_AMA_Data$Ln_Capital_Expenditure <- log(Cleaned_AMA_Data$Capital_Expenditure)

#Transformed Models
mod4 <- lm(log(Capital_Expenditure) ~ log(Population), data = Cleaned_AMA_Data) 
summary(mod4)
# 
# Call:
# lm(formula = log(Capital_Expenditure) ~ log(Population), data = Cleaned_AMA_Data)
# 
# Residuals:
#      Min       1Q   Median       3Q      Max 
# -2.09346 -0.17567  0.05599  0.80484  0.85654 
# 
# Coefficients:
#                 Estimate Std. Error t value Pr(>|t|)  
# (Intercept)      -1.7563     5.8693  -0.299   0.7734  
# log(Population)   1.2423     0.4163   2.984   0.0204 *
# ---
# Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
# Residual standard error: 1.004 on 7 degrees of freedom
# Multiple R-squared:  0.5598,  Adjusted R-squared:  0.4969 
# F-statistic: 8.903 on 1 and 7 DF,  p-value: 0.02041
#  Scatter Plots (Transformed Data)
ggplot(Cleaned_AMA_Data, aes(x = log(Population), y = log(Capital_Expenditure))) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE)

  labs(title = "Log(Population) vs. Log(Capital Expenditure)",
       x = "Log(Population)", y = "Log(Capital Expenditure)")
# $x
# [1] "Log(Population)"
# 
# $y
# [1] "Log(Capital Expenditure)"
# 
# $title
# [1] "Log(Population) vs. Log(Capital Expenditure)"
# 
# attr(,"class")
# [1] "labels"

After the transformation the linear regression result show a statistically significant with p-value (0.0204) for log(Population). It shows there is a significant positive relationship between the log of population and the log of capital expenditure.

For every one-unit increase in log(Population), log(Capital_Expenditure) is expected to increase by 1.2423 units. This means a 1% increase in population corresponds to a 1.24% increase in capital expenditure. Multiple R-squared (0.5598) indicates that 55.98% of the variation in log of capital expenditure can be explained by the log of population.

cor.test(Cleaned_AMA_Data$Ln_Pop, Cleaned_AMA_Data$Ln_Cap_Expenditure)
## 
##  Pearson's product-moment correlation
## 
## data:  Cleaned_AMA_Data$Ln_Pop and Cleaned_AMA_Data$Ln_Cap_Expenditure
## t = 2.9838, df = 7, p-value = 0.02041
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1671597 0.9435049
## sample estimates:
##       cor 
## 0.7482184

The correlation between them is significant with correlation coefficient of 0.748, which is pretty strong.

# Calculate Per Capita Values
Cleaned_AMA_Data$Capital_Exp_Per_Capita <- Cleaned_AMA_Data$Capital_Expenditure / Cleaned_AMA_Data$Population

# Plotting Trends 

ggplot(Cleaned_AMA_Data, aes(x = Year)) +
  geom_line(aes(y = Population)) +
  geom_point(aes(y = Population), color = "dodgerblue") +
  labs(title = "Population Trend", x = "Year", y = "Population") +
  scale_y_continuous(labels = comma)

ggplot(Cleaned_AMA_Data, aes(x = Year)) +
  geom_line(aes(y = Capital_Expenditure, color = "Capital Expenditure")) +
  geom_point(aes(y = Capital_Expenditure, color = "Capital Expenditure")) +
  labs(title = " Expenditure Trends", x = "Year", y = "Amount", color = "Type") +
  theme(axis.title.y.right = element_text(vjust=2))

ggplot(Cleaned_AMA_Data, aes(x = Year)) +
  geom_line(aes(y = Population, color = "Population")) +
  geom_point(aes(y = Population, color = "Population")) +
  geom_line(aes(y = Capital_Expenditure, color = "Capital Expenditure")) +
  geom_point(aes(y = Capital_Expenditure, color = "Capital Expenditure")) +
  labs(title = "Population and Capital Expenditure Trends", x = "Year", y = "Amount", color = "Type") +
  scale_y_continuous(labels = comma, sec.axis = sec_axis(~., name = "Population")) +
  theme(axis.title.y.right = element_text(vjust=2))

# Per Capita Analysis 
average_capita <- mean(Cleaned_AMA_Data$Capital_Exp_Per_Capita)

ggplot(Cleaned_AMA_Data, aes(x = Year)) +
  geom_line(aes(y = Capital_Exp_Per_Capita, color = "Capital Exp. Per Capita")) +
  geom_point(aes(y = Capital_Exp_Per_Capita, color = "Capital Exp. Per Capita")) +
  geom_hline(yintercept = average_capita, linetype = "dashed", color = "red")+
  labs(title = "Capital Expenditure Per Capita Over Time", x = "Year", y = "Ghana Cedis Per Capita", color = "Type") +
  scale_y_continuous(labels = comma) 

1.4 What is the relationship between revenue growth and infrastructure delivery (Model)

Using total revenue growth rate and infrastructure delivery (capital expenditure per capita).

# Descriptive statistics
Cleaned_AMA_Data %>% skim(Capital_Exp_Per_Capita)
Data summary
Name Piped data
Number of rows 9
Number of columns 83
_______________________
Column type frequency:
numeric 1
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Capital_Exp_Per_Capita 0 1 7.18 5.05 0.73 3.08 5.64 12.8 13.58 ▇▅▂▁▇
Cleaned_AMA_Data %>% skim(TtRev_Growth_Rate)
Data summary
Name Piped data
Number of rows 9
Number of columns 83
_______________________
Column type frequency:
numeric 1
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
TtRev_Growth_Rate 1 0.89 -8.98 33.24 -81.19 -14.2 -1.61 5.94 29.62 ▂▁▂▇▃
# Histograms
ggplot(Cleaned_AMA_Data, aes(x = Capital_Exp_Per_Capita)) +
  geom_histogram(bins = 10, fill = "dodgerblue", color = "black") +
  labs(title = "Distribution of Capital expenditure per capita", x = "Capital expenditure per capita") +
  scale_x_continuous(labels = comma)

ggplot(Cleaned_AMA_Data, aes(x = TtRev_Growth_Rate)) +
  geom_histogram(bins = 10, fill = "dodgerblue", color = "black") +
  labs(title = "Distribution of Total Revenue Growth Rate", x = "Total revenue growth rate") +
  scale_x_continuous(labels = percent)

# Plotting Trends 

ggplot(Cleaned_AMA_Data, aes(x = Year)) +
  geom_line(aes(y = TtRev_Growth_Rate, color = "Total Revenue Growth Rate")) +
  geom_point(aes(y = TtRev_Growth_Rate, color = "Total Revenue Growth Rate")) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  geom_line(aes(y = Capital_Exp_Per_Capita, color = "Capital Expenditure Per Capita")) +
  geom_point(aes(y = Capital_Exp_Per_Capita, color = "Capital Expenditure Per Capita")) +
  labs(
    title = "Total Revenue Growth Rate vs. Capital Expenditure Per Capita",
    x = "Year",
    y = "Total Revenue Growth Rate (%)"  
  ) +
  scale_y_continuous(
    labels = percent_format(scale = 1),  
    sec.axis = sec_axis(~., name = "Capital Expenditure Per Capita")
  ) +
  scale_color_manual(
    values = c("Total Revenue Growth Rate" = "lightseagreen", "Capital Expenditure Per Capita" = "indianred"),
    name = "Type"
  ) +
  theme(axis.title.y.right = element_text(vjust = 2))

The histograms show an uneven distribution of Total revenue growth rate and Capital expenditure per capita. The Total revenue growth rate reveals the presence of two distinct clusters.The trends plots show clear that the trend of Total revenue growth rate ( which experienced significant changes) is not directly linked to the trend of Capital expenditure per capita( which remained stable).

1.4.1 Regression results

mod5 <- lm(Capital_Exp_Per_Capita ~ TtRev_Growth_Rate, data = Cleaned_AMA_Data)
summary(mod5)
## 
## Call:
## lm(formula = Capital_Exp_Per_Capita ~ TtRev_Growth_Rate, data = Cleaned_AMA_Data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.2716 -3.7391 -0.3963  4.0043  6.9698 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)   
## (Intercept)        7.83859    2.01599   3.888  0.00809 **
## TtRev_Growth_Rate  0.05206    0.06229   0.836  0.43528   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.478 on 6 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.1043, Adjusted R-squared:  -0.045 
## F-statistic: 0.6986 on 1 and 6 DF,  p-value: 0.4353
ggplot(Cleaned_AMA_Data, aes(x = TtRev_Growth_Rate, y = Capital_Exp_Per_Capita)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Revenue Growth vs. Capital Expenditure (Per Capita)",
       x = "Total Revenue Growth Rate (%)",
       y = "Capital Expenditure Per Capita")

The regression result show there no statistically significant relationship between total revenue growth rate and infrastructure delivery (capital expenditure per capita) with p-value (0.43528) is greater than 0.05 significance level. This means that changes in revenue growth do not significantly predict changes in capital expenditure per capita in this model. The R-squared (0.1043) indicates only 10.43% of the variation in capital expenditure per capita can be explained by revenue growth (total revenue growth rate)

  • Transformations and correlation
#Transformed Models
lm(log(Capital_Exp_Per_Capita) ~ log(TtRev_Growth_Rate), data = Cleaned_AMA_Data) %>% 
  summary()
# 
# Call:
# lm(formula = log(Capital_Exp_Per_Capita) ~ log(TtRev_Growth_Rate), 
#     data = Cleaned_AMA_Data)
# 
# Residuals:
#        2        4        7 
# -0.20616  0.30053 -0.09437 
# attr(,"label")
# [1] "Capital Expenditure"
# attr(,"format.spss")
# [1] "F8.0"
# 
# Coefficients:
#                        Estimate Std. Error t value Pr(>|t|)
# (Intercept)             -1.7036     0.5923  -2.876    0.213
# log(TtRev_Growth_Rate)   1.3160     0.2299   5.725    0.110
# 
# Residual standard error: 0.3765 on 1 degrees of freedom
#   (6 observations deleted due to missingness)
# Multiple R-squared:  0.9704,  Adjusted R-squared:  0.9408 
# F-statistic: 32.77 on 1 and 1 DF,  p-value: 0.1101
cor.test(Cleaned_AMA_Data$TtRev_Growth_Rate, Cleaned_AMA_Data$Capital_Exp_Per_Capita, use = "complete.obs")
# 
#   Pearson's product-moment correlation
# 
# data:  Cleaned_AMA_Data$TtRev_Growth_Rate and Cleaned_AMA_Data$Capital_Exp_Per_Capita
# t = 0.8358, df = 6, p-value = 0.4353
# alternative hypothesis: true correlation is not equal to 0
# 95 percent confidence interval:
#  -0.4942023  0.8371108
# sample estimates:
#      cor 
# 0.322932

The log transformation and correlation still show a non-significant results.

1.5 What is the relationship between expenditure growth and infrastructure delivery?

  • Regresion results using expenditure growth (Expenditure_Growth) and infrastructure delivery (capital expenditure per capita).
Cleaned_AMA_Data$Expenditure_Growth <- c(NA, diff(Cleaned_AMA_Data$Total_Expenditure) / Cleaned_AMA_Data$Total_Expenditure[-nrow(Cleaned_AMA_Data)]) * 100

mod6 <- lm(Capital_Exp_Per_Capita ~ Expenditure_Growth, data = Cleaned_AMA_Data)
  summary(mod6)
## 
## Call:
## lm(formula = Capital_Exp_Per_Capita ~ Expenditure_Growth, data = Cleaned_AMA_Data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.0062 -2.8932 -0.2143  1.9967  7.1834 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)   
## (Intercept)         7.50991    1.83291   4.097  0.00638 **
## Expenditure_Growth  0.08746    0.07122   1.228  0.26545   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.174 on 6 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2008, Adjusted R-squared:  0.06765 
## F-statistic: 1.508 on 1 and 6 DF,  p-value: 0.2654
  ggplot(Cleaned_AMA_Data, aes(x = Expenditure_Growth, y = Capital_Exp_Per_Capita)) +
    geom_point() + geom_smooth(method = "lm", se = TRUE)+
    labs(title = "Expenditure Growth vs. Capital Expenditure (Per Capita)",
         x = "Expenditure Growth Rate (%)",
         y = "Capital Expenditure Per Capita")

  lm(log(Capital_Exp_Per_Capita) ~ Expenditure_Growth, data = Cleaned_AMA_Data) %>% 
  summary()
## 
## Call:
## lm(formula = log(Capital_Exp_Per_Capita) ~ Expenditure_Growth, 
##     data = Cleaned_AMA_Data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0133 -0.2399  0.1215  0.5026  1.1014 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)   
## (Intercept)         1.66341    0.37156   4.477  0.00421 **
## Expenditure_Growth  0.01226    0.01444   0.849  0.42829   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.049 on 6 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.1073, Adjusted R-squared:  -0.04146 
## F-statistic: 0.7213 on 1 and 6 DF,  p-value: 0.4283

The linear regression results no statistically significant relationship and even after the log transformation the results still remain non-significant.

2 SHEET 2

2.1 What is the relationship between allocative and funding decision-making and revenue patterns?

# no variables

2.2 What is the relationship between allocative decision-making and expenditure patterns?

  • No direct variables are available on this, some descriptive statistics of closely related are below
# Expenditure Composition:
Cleaned_AMA_Data$CapExp_Pct <- (Cleaned_AMA_Data$Capital_Expenditure / Cleaned_AMA_Data$Total_Expenditure) * 100
Cleaned_AMA_Data$CapExp_Rev_Ratio <- (Cleaned_AMA_Data$Capital_Expenditure / Cleaned_AMA_Data$Total_Revenue)



# Expenditure Composition 
ggplot(Cleaned_AMA_Data, aes(x = Year, y = CapExp_Pct)) +
  geom_bar(stat = "identity", fill = "dodgerblue") +
  geom_point()+
  labs(title = "Capital Expenditure as Percentage of Total Expenditure",
       x = "Year",
       y = "Percentage") +
  scale_y_continuous(labels = percent_format(accuracy = 1))

# Trends of Revenue and Expenditure over the years.

ggplot(Cleaned_AMA_Data, aes(x = Year)) +
  geom_line(aes(y = Total_Revenue, color = "Total Revenue")) +
  geom_point(aes(y = Total_Revenue)) +  # Added aes(y = Total_Revenue)
  geom_line(aes(y = Total_Expenditure, color = "Total Expenditure")) +
  geom_point(aes(y = Total_Expenditure)) +  # Added aes(y = Total_Expenditure)
  labs(title = "Revenue and Expenditure Trends Over Years",
       x = "Year",
       y = "Amount (Ghana Cedis)", color = "Type") +
  scale_color_manual(values = c("Total Revenue" = "blue", "Total Expenditure" = "red")) +
  scale_y_continuous(labels = comma) 

ggplot(Cleaned_AMA_Data, aes(x = Year)) +
  geom_line(aes(y = Total_Revenue, color = "Total Revenue"), size = 1) +
  geom_line(aes(y = IGF, color = "IGF"), size = 1) +
  geom_line(aes(y = DACF, color = "DACF"), size = 1) +
  geom_line(aes(y = Capital_Expenditure, color = "Capital Expenditure"), size = 1) +
  geom_line(aes(y = Total_Expenditure, color = "Total Expenditure"), size = 1) +
  geom_line(aes(y = Others_Sources, color = "Other Sources"), size = 1) +
  labs(
    title = "Revenue and Expenditure Trends Over Years",
    x = "Year",
    y = "Amount (Ghana Cedis)",
    color = "Type"
  ) +
  scale_color_manual(
    values = c(
      "Total Revenue" = "blue",
      "Other Sources" = "skyblue",
      "IGF" = "green",
      "DACF" = "darkgray",
      "Capital Expenditure" = "purple",
      "Total Expenditure" = "red"
    )
  ) +
  scale_y_continuous(labels = scales::comma) +
  theme(
    legend.position = "right", 
    legend.title = element_text(face = "bold"), 
    plot.title = element_text(hjust = 0.5, face = "bold") 
  )

# IGF to Total Expenditure Ratio 
ggplot(Cleaned_AMA_Data, aes(x = Year, y = IGF_TE)) +
  geom_line(color = "steelblue", size = 1) +
  geom_point(size = 2.5) +
  labs(
    title = "IGF to Total Expenditure Ratio Over Years",
    x = "Year",
    y = "Ratio (IGF/Total Expenditure)"
  ) +
  scale_y_continuous(labels = percent_format(accuracy = 1)) 

# CapExp_Rev_Ratio plot.
ggplot(Cleaned_AMA_Data, aes(x = Year, y = CapExp_Rev_Ratio)) +
  geom_line(color = "steelblue", size = 1) +
  geom_point(size = 2.5) +
  labs(
    title = "Capital Expenditure to Total Revenue Ratio Over Years",
    x = "Year",
    y = "Ratio (Capital Expenditure/Total Revenue)"
  ) +
  scale_y_continuous(labels = comma) 

cor.test(Cleaned_AMA_Data$Total_Expenditure, Cleaned_AMA_Data$Total_Revenue)
## 
##  Pearson's product-moment correlation
## 
## data:  Cleaned_AMA_Data$Total_Expenditure and Cleaned_AMA_Data$Total_Revenue
## t = 23.708, df = 7, p-value = 0.00000006037
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.9698015 0.9987517
## sample estimates:
##       cor 
## 0.9938305

In the above plots, the Capital Expenditure as Percentage of Total Expenditure shows a slightly high capital investment with peak around 2018, followed by a sharp and sustained decline. Also, there is strong correlation between Total Revenue and Total Expenditure, with both peaking around 2017 and after.

2.3 What is the relationship between population trend, service delivery and revenue and expenditure patterns?

# Revenue Per Capita
Cleaned_AMA_Data$Total_Revenue_Per_Capita <- Cleaned_AMA_Data$Total_Revenue / Cleaned_AMA_Data$Population
Cleaned_AMA_Data$IGF_Per_Capita <- Cleaned_AMA_Data$IGF / Cleaned_AMA_Data$Population
Cleaned_AMA_Data$DACF_Per_Capita <- Cleaned_AMA_Data$DACF / Cleaned_AMA_Data$Population

# Time Series Plots (Improved)

# Total Revenue and Expenditure Trends
ggplot(Cleaned_AMA_Data, aes(x = Year)) +
  geom_line(aes(y = Total_Revenue, color = "Total Revenue"), size = 1) +
  geom_point(aes(y = Total_Revenue, color = "Total Revenue")) +
  geom_line(aes(y = IGF, color = "IGF"), size = 1) +
  geom_point(aes(y = IGF, color = "IGF")) +
  geom_line(aes(y = DACF, color = "DACF"), size = 1) +
  geom_point(aes(y = DACF, color = "DACF")) +
  geom_line(aes(y = Capital_Expenditure, color = "Capital Expenditure"), size = 1) +
  geom_point(aes(y = Capital_Expenditure, color = "Capital Expenditure")) +
  geom_line(aes(y = Total_Expenditure, color = "Total Expenditure"), size = 1) +
  geom_point(aes(y = Total_Expenditure, color = "Total Expenditure")) +
  geom_line(aes(y = Others_Sources, color = "Other Sources"), size = 1) +
  geom_point(aes(y = Others_Sources, color = "Other Sources")) +
  labs(
    title = "Revenue and Expenditure Trends Over Years",
    x = "Year",
    y = "Amount (Ghana Cedis)",
    color = "Type"
  ) +
  scale_color_manual(
    values = c(
      "Total Revenue" = "blue",
      "Other Sources" = "skyblue",
      "IGF" = "green",
      "DACF" = "darkgray",
      "Capital Expenditure" = "purple",
      "Total Expenditure" = "red"
    )
  ) +
  scale_y_continuous(labels = comma) +
  theme(
    legend.position = "right",
    legend.title = element_text(face = "bold"),
    plot.title = element_text(hjust = 0.5, face = "bold")
  )

# Population Trend
ggplot(Cleaned_AMA_Data, aes(x = Year, y = Population)) +
  geom_line(color = "steelblue", size = 1) +
  geom_point(size = 2.5) +
  labs(
    title = "Population Trend Over Years",
    x = "Year",
    y = "Population"
  ) +
  scale_y_continuous(labels = comma) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    axis.title = element_text(face = "bold")
  )

# IGF to Total Expenditure Ratio
ggplot(Cleaned_AMA_Data, aes(x = Year, y = IGF_TE)) +
  geom_line(color = "steelblue", size = 1) +
  geom_point(size = 2.5) +
  labs(
    title = "IGF to Total Expenditure Ratio Over Years",
    x = "Year",
    y = "Ratio (IGF/Total Expenditure)"
  ) +
  scale_y_continuous(labels = percent_format(accuracy = 1)) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    axis.title = element_text(face = "bold")
  )

# Per capita plot
ggplot(Cleaned_AMA_Data, aes(x = Year)) +
  geom_line(aes(y = Total_Revenue_Per_Capita, color = "Total Revenue Per Capita")) +
  geom_point(aes(y = Total_Revenue_Per_Capita, color = "Total Revenue Per Capita")) +
  geom_line(aes(y = IGF_Per_Capita, color = "IGF Per Capita")) +
  geom_point(aes(y = IGF_Per_Capita, color = "IGF Per Capita")) +
  geom_line(aes(y = DACF_Per_Capita, color = "DACF Per Capita")) +
  geom_point(aes(y = DACF_Per_Capita, color = "DACF Per Capita")) +
  labs(title = "Revenue Per Capita trends", x = "Year", y = "Amount (Ghana Cedis)", color = "Type") +
  scale_y_continuous(labels = comma) 

cor_matrix <- cor(Cleaned_AMA_Data[, c("Population", "Total_Revenue", "Total_Expenditure", "IGF_TE", "CapExp_Pct", "IGF")], use = "complete.obs")
print(cor_matrix)
##                   Population Total_Revenue Total_Expenditure    IGF_TE
## Population         1.0000000     0.5363112         0.5632552 0.4315557
## Total_Revenue      0.5363112     1.0000000         0.9938305 0.5808075
## Total_Expenditure  0.5632552     0.9938305         1.0000000 0.5475533
## IGF_TE             0.4315557     0.5808075         0.5475533 1.0000000
## CapExp_Pct         0.6080252     0.7932208         0.8353768 0.6303333
## IGF                0.5482297     0.9341365         0.9195661 0.8180036
##                   CapExp_Pct       IGF
## Population         0.6080252 0.5482297
## Total_Revenue      0.7932208 0.9341365
## Total_Expenditure  0.8353768 0.9195661
## IGF_TE             0.6303333 0.8180036
## CapExp_Pct         1.0000000 0.8079598
## IGF                0.8079598 1.0000000
corrplot(cor_matrix, main = "Correlation matrix of population and expenditure patterns")

In the above there is a strong positive correlation between total revenue and total expenditure and alo between IGF.

2.3.1 Regression Analysis

# Total Revenue vs Population
model_revenue_pop <- lm(Total_Revenue ~ Population, data = Cleaned_AMA_Data)
summary(model_revenue_pop)
## 
## Call:
## lm(formula = Total_Revenue ~ Population, data = Cleaned_AMA_Data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -29421174  -9145469  -1831067  15761302  30092080 
## 
## Coefficients:
##                Estimate  Std. Error t value Pr(>|t|)  
## (Intercept) 40246399.97 17978138.90   2.239   0.0602 .
## Population        17.13       10.19   1.681   0.1366  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21800000 on 7 degrees of freedom
## Multiple R-squared:  0.2876, Adjusted R-squared:  0.1859 
## F-statistic: 2.826 on 1 and 7 DF,  p-value: 0.1366
# Total Expenditure vs Population
model_expenditure_pop <- lm(Total_Expenditure ~ Population, data = Cleaned_AMA_Data)
summary(model_expenditure_pop)
## 
## Call:
## lm(formula = Total_Expenditure ~ Population, data = Cleaned_AMA_Data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -27719681 -11976200   -998826  17534296  24229751 
## 
## Coefficients:
##                 Estimate   Std. Error t value Pr(>|t|)  
## (Intercept) 37555261.801 17222106.762   2.181   0.0656 .
## Population        17.606        9.762   1.804   0.1143  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20880000 on 7 degrees of freedom
## Multiple R-squared:  0.3173, Adjusted R-squared:  0.2197 
## F-statistic: 3.253 on 1 and 7 DF,  p-value: 0.1143
# Capital Expenditure vs Total Revenue and IGF_TE
model_capital_rev_igf <- lm(Capital_Expenditure ~ Total_Revenue + IGF_TE, data = Cleaned_AMA_Data)
summary(model_capital_rev_igf)
## 
## Call:
## lm(formula = Capital_Expenditure ~ Total_Revenue + IGF_TE, data = Cleaned_AMA_Data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -11115284  -1579096  -1347879   3150098   7196110 
## 
## Coefficients:
##                     Estimate     Std. Error t value Pr(>|t|)  
## (Intercept)   -17083481.3394   9933027.7839  -1.720   0.1362  
## Total_Revenue         0.3807         0.1123   3.391   0.0147 *
## IGF_TE          8799506.4086  25967153.1929   0.339   0.7463  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6247000 on 6 degrees of freedom
## Multiple R-squared:  0.7651, Adjusted R-squared:  0.6867 
## F-statistic: 9.769 on 2 and 6 DF,  p-value: 0.01297
# IGF_TE vs Population and Total Revenue
model_igfte_pop_rev <- lm(IGF_TE ~ Population + Total_Revenue, data = Cleaned_AMA_Data)
summary(model_igfte_pop_rev)
## 
## Call:
## lm(formula = IGF_TE ~ Population + Total_Revenue, data = Cleaned_AMA_Data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.11347 -0.06445  0.02422  0.04453  0.11092 
## 
## Coefficients:
##                     Estimate     Std. Error t value Pr(>|t|)  
## (Intercept)   0.276691517224 0.104452816859   2.649   0.0381 *
## Population    0.000000023280 0.000000053550   0.435   0.6790  
## Total_Revenue 0.000000002121 0.000000001676   1.265   0.2528  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09669 on 6 degrees of freedom
## Multiple R-squared:  0.3576, Adjusted R-squared:  0.1434 
## F-statistic:  1.67 on 2 and 6 DF,  p-value: 0.2651
#  Visualizations

# Scatter plot: Total Revenue vs Population
ggplot(Cleaned_AMA_Data, aes(x = Population, y = Total_Revenue)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE) +
  labs(title = "Total Revenue vs Population", x = "Population", y = "Total Revenue") +
  scale_x_continuous(labels = comma) +
  scale_y_continuous(labels = comma)

# Scatter plot: Total Expenditure vs Population
ggplot(Cleaned_AMA_Data, aes(x = Population, y = Total_Expenditure)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE) +
  labs(title = "Total Expenditure vs Population", x = "Population", y = "Total Expenditure") +
  scale_x_continuous(labels = comma) +
  scale_y_continuous(labels = comma)

# Scatter plot: Capital Expenditure vs Total Revenue
ggplot(Cleaned_AMA_Data, aes(x = Total_Revenue, y = Capital_Expenditure)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE) +
  labs(title = "Capital Expenditure vs Total Revenue", x = "Total Revenue", y = "Capital Expenditure") +
  scale_x_continuous(labels = comma) +
  scale_y_continuous(labels = comma)

# Scatter plot: IGF_TE vs Population
ggplot(Cleaned_AMA_Data, aes(x = Population, y = IGF_TE)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE) +
  labs(title = "IGF_TE vs Population", x = "Population", y = "IGF_TE") +
  scale_x_continuous(labels = comma) +
  scale_y_continuous(labels = percent_format(accuracy = 1))

ggplot(Cleaned_AMA_Data, aes(x = Total_Revenue, y = IGF_TE)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE) +
  labs(title = "IGF_TE vs Total Revenue", x = "Total Revenue", y = "IGF_TE") +
  scale_x_continuous(labels = comma) +
  scale_y_continuous(labels = percent_format(accuracy = 1))

In the regression results above, we found no significant relationship between between Total Revenue and Population, Total Expenditure and Population, and Capital Expenditure and Total Revenue. However in between IGF_TE vs Population and Total Revenue. It was found that Total Revenue was significant.

2.4 What is the relationship between service delivery and revenue and expenditure patterns?

# no variables

2.5 SHEET 3

2.6 SHEET 3